#encoding=utf8
import os,REdealText

class loadFolder(object):
	"""docstring for loadFl"""
	def __init__(self, path):
		super(loadFolder, self).__init__()
		self.path = path

	def __iter__(self):
		for f in os.listdir(self.path):
			file_path = os.path.join(self.path,f)
			if os.path.isdir(file_path):
				yield file_path


class loadFiles(object):
	"""docstring for loadFiles"""
	def __init__(self, path):
		super(loadFiles, self).__init__()
		self.path = path
	
	def __iter__(self):
		for folder in loadFolder(self.path):
			catg = folder.split(os.sep)[-1]
			for file in os.listdir(folder):
				yield catg,file


if __name__ == '__main__':
	path = os.path.abspath(r'../CSCMNews')
	for i,obj in enumerate(loadFiles(path)):
		if i%10000 == 0:
			file_path = os.path.join(os.path.join(path,obj[0]),obj[1])
			file_txt = REdealText.readFile(file_path)
			file_txt = REdealText.textParse(file_txt)
			print(file_txt)
			break